In today’s lab class, we are going through the application of the concepts of regression, correlation, and PCA using the data sets built-in in R.
#set mirror
local({r <- getOption("repos")
r["CRAN"] <- "http://cran.r-project.org"
options(repos=r)})
# Loading the example data
data(iris)
data(cars)
data(mtcars)
data(USJudgeRatings)
speed <- cars$speed
distance <- cars$dist
model.cars <- lm(distance ~ speed)
summary(model.cars)
##
## Call:
## lm(formula = distance ~ speed)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.069 -9.525 -2.272 9.215 43.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -17.5791 6.7584 -2.601 0.0123 *
## speed 3.9324 0.4155 9.464 1.49e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 15.38 on 48 degrees of freedom
## Multiple R-squared: 0.6511, Adjusted R-squared: 0.6438
## F-statistic: 89.57 on 1 and 48 DF, p-value: 1.49e-12
plot(x = speed,
y = distance,
xlim = c(min(speed), max(speed)),
ylim = c(min(distance), max(distance)),
pch = 19,
main = "Speed of cars vs. Distance taken to stop")
text(x = min(speed) + sd(speed, na.rm = T)/2,
y = max(distance)-(max(distance)/10),
labels = paste0("R-squared = ", round(x = summary(model.cars)$r.squared, digits = 2)))
abline(model.cars, col = "red")
# Installing and/or loading required packages
if(!require(package = "corrplot", quietly = T)){
install.packages("corrplot")
}
## corrplot 0.92 loaded
library(corrplot)
# Scatterplot variable vs. variable - USJudgeRatings data
pairs(USJudgeRatings)
# Calculating the correlation matrix based on USJudgeRatings data
cor.USJudgeRatings <- cor(x = USJudgeRatings, method = "pearson")
# Plotting the matrix of correlation
corrplot(corr = cor.USJudgeRatings, method = "circle")
# Installing required packages
if(!require(package = "ggplot2", quietly = T)){
install.packages("ggplot2")
}
if(!require(package = "ggfortify", quietly = T)){
install.packages("ggfortify")
}
library(ggplot2)
library(ggfortify)
# Extracting numeric matrix from iris data
iris.data <- subset(x = iris, select = -Species)
# Extracting vector of species from iris data
iris.species <- subset(x = iris, select = Species)
iris.species <- iris.species$Species
# Computing PCA
iris.pca.prcmp <- prcomp(x = iris.data)
# Plotting PCA results using ggplot2
autoplot(iris.pca.prcmp, data = iris, colour = 'Species',
loadings = TRUE, loadings.colour = 'blue',
loadings.label = TRUE, loadings.label.size = 3)
# Install and/or load the package e1071
if(!require(package = "e1071", quietly = T)){
install.packages("e1071")
}
# Computing SVMs
model.svm <- svm(x = iris.data, y = iris.species, type="C")
# Computing and Plotting PCoA
plot(cmdscale(dist(iris.data)),
col = as.integer(iris.species),
pch = c("o","+")[1:150 %in% model.svm$index + 1])
pred <- predict(model.svm, iris.data)
table(pred, iris.species)
## iris.species
## pred setosa versicolor virginica
## setosa 50 0 0
## versicolor 0 48 2
## virginica 0 2 48
Include the table in your answer.
data(mtcars)
#fix(mtcars)
prcars<-prcomp(mtcars)
autoplot(prcars, data=mtcars, loadings=TRUE, loadings.label=TRUE)
Include the plot in your answer.
if(!require(package = "heatmaply", quietly = T)){
install.packages("heatmaply")}
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
## Registered S3 methods overwritten by 'registry':
## method from
## print.registry_field proxy
## print.registry_entry proxy
##
## ======================
## Welcome to heatmaply version 1.4.2
##
## Type citation('heatmaply') for how to cite the package.
## Type ?heatmaply for the main documentation.
##
## The github page is: https://github.com/talgalili/heatmaply/
## Please submit your suggestions and bug-reports at: https://github.com/talgalili/heatmaply/issues
## You may ask questions at stackoverflow, use the r and heatmaply tags:
## https://stackoverflow.com/questions/tagged/heatmaply
## ======================
library(heatmaply)
heatmaply(
mtcars,
xlab = "Features",
ylab = "Cars",
main = "Raw data"
)
heatmaply(
normalize(mtcars),
xlab = "Features",
ylab = "Cars",
main = "Data Normalization"
)
scale_data <- as.data.frame(scale(mtcars))
prcars<-prcomp(scale_data)
autoplot(prcars, data=scale_data, loadings=TRUE, loadings.label=TRUE)
data()
Choose an appropriate additional dataset. Perform PCA and / or SVM and interpret the result.